Setup

source("scripts/utils.R")
source("scripts/settings.R")
load("data/processed/dname.RData")
load("data/processed/accessibility.RData")
load("data/processed/rna_dynamics.RData")
load("data/processed/chip_dynamics.RData")
load("data/processed/cg_density.RData")
load(paste0("data/processed/h3k4me3_data_sperm_", sperm_data, ".RData"))
chip_dfs[["Spermatid"]]$cg.density <- unname(cg_density[allgenes$gene_id])
chip_dfs[["Spermatid"]]$cg.density[is.na(chip_dfs[["Spermatid"]]$cg.density)] <- 0
chip_dfs[["Spermatid"]]$promoter.dna.methyl <- DNAme_spermatid
chip_dfs[["Sperm"]]$promoter.dna.methyl <- DNAme_sperm
chip_dfs[["Pre-ZGA"]]$promoter.dna.methyl <- DNAme_prezga
chip_dfs[["Pre-ZGA"]]$promoter.accessibility <- DamID_prezga

chip_dfs[["Spermatid"]]$expression <- log(expression.data[["Spermatid"]] + 1)
chip_dfs[["ZGA"]]$expression.6hpf <- log(expression.data[["6hpf"]] + 1)
chip_dfs[["ZGA"]]$expression.7hpf <- log(expression.data[["7hpf"]] + 1)
chip_dfs[["ZGA"]]$expression.8hpf <- log(expression.data[["8hpf"]] + 1)
chip_dfs[["ZGA"]]$expression.9hpf <- log(expression.data[["9hpf"]] + 1)
chip_dfs[["Spermatid"]]$expression[is.na(chip_dfs[["Spermatid"]]$expression)] <- 0
chip_dfs[["ZGA"]]$expression.6hpf[is.na(chip_dfs[["ZGA"]]$expression.6hpf)] <- 0
chip_dfs[["ZGA"]]$expression.7hpf[is.na(chip_dfs[["ZGA"]]$expression.7hpf)] <- 0
chip_dfs[["ZGA"]]$expression.8hpf[is.na(chip_dfs[["ZGA"]]$expression.8hpf)] <- 0
chip_dfs[["ZGA"]]$expression.9hpf[is.na(chip_dfs[["ZGA"]]$expression.9hpf)] <- 0

Violin / ECDF plots RNA dynamics

g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns, cols=cols_rna, timepoint="Pre-ZGA", type="promoter.accessibility")
grid.arrange(g)

ggsave(file=paste0("output/multiomic/pre_zga_accessibility_rna_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)

for(stage in c("Spermatid", "Sperm", "Pre-ZGA")){
  g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns, cols=cols_rna, timepoint=stage, type="promoter.dna.methyl")
  grid.arrange(g)
  ggsave(file=paste0("output/multiomic/", stage, "_dname_rna_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)
}

g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns, cols=cols_rna, timepoint="Spermatid", type="cg.density")
grid.arrange(g)

ggsave(file=paste0("output/multiomic/cg_density_rna_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)

Violin / ECDF plots RNA timing dynamics

g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns_timing, cols=cols_rna_timing, timepoint="Pre-ZGA", type="promoter.accessibility")
grid.arrange(g)

ggsave(file=paste0("output/multiomic/pre_zga_accessibility_rna_timing_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)

for(stage in c("Spermatid", "Sperm", "Pre-ZGA")){
  g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns_timing, cols=cols_rna_timing, timepoint=stage, type="promoter.dna.methyl")
  grid.arrange(g)
  ggsave(file=paste0("output/multiomic/", stage, "_dname_rna_timing_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)
}

g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns_timing, cols=cols_rna_timing, timepoint="Spermatid", type="cg.density")
grid.arrange(g)

ggsave(file=paste0("output/multiomic/cg_density_rna_timing_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)

Violin / ECDF plots Chip dynamics

g <- plot_stage_dynamics(chip_dfs, dyns=chip_dyns, cols=cols_chip, timepoint="Pre-ZGA", type="promoter.accessibility")
grid.arrange(g)

ggsave(file=paste0("output/multiomic/pre_zga_accessibility_chip_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)

for(stage in c("Spermatid", "Sperm", "Pre-ZGA")){
  g <- plot_stage_dynamics(chip_dfs, dyns=chip_dyns, cols=cols_chip, timepoint=stage, type="promoter.dna.methyl")
  grid.arrange(g)
  ggsave(file=paste0("output/multiomic/", stage, "_dname_chip_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)
}

g <- plot_stage_dynamics(chip_dfs, dyns=chip_dyns, cols=cols_chip, timepoint="Spermatid", type="cg.density")
grid.arrange(g)

ggsave(file=paste0("output/multiomic/cg_density_chip_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)

Heatmap per H3K4me3/RNA dynamic

pure_labels <- c("Spermatid.promoter.dna.methyl" = "Spermatid", 
                 "Sperm.promoter.dna.methyl"="Sperm", 
                 "Pre.ZGA.promoter.dna.methyl"="Pre-ZGA", 
                 "Pre.ZGA.promoter.accessibility"="Pre-ZGA", 
                 "ZGA.expression.9hpf" = "9hpf", 
                 "ZGA.expression.8hpf" = "8hpf", 
                 "ZGA.expression.7hpf" = "7hpf",
                 "ZGA.expression.6hpf" = "6hpf",
                 "CG.density" ="",
                 "Spermatid.peak.width" = "Spermatid",
                 "Sperm.peak.width" = "Sperm",
                 "Pre.ZGA.peak.width" = "Pre-ZGA",
                 "ZGA.peak.width" = "ZGA",
                 "Spermatid.promoter.mean" = "Spermatid",
                 "Sperm.promoter.mean" = "Sperm",
                 "Pre.ZGA.promoter.mean" = "Pre-ZGA",
                 "ZGA.promoter.mean" = "ZGA",
                 "Spermatid.expression" = "Spermatid")

chip_dyn_map = melt(chip_dyns)
rownames(chip_dyn_map) <- chip_dyn_map[,1]
label_dyn <- function(gene, dyns){for(i in 1:length(dyns)){if(any(gene %in% dyns[[i]])){return(names(dyns)[i])}}; return(NA)}
col_fun = colorRamp2(seq(-2, 2), rev(COL2("RdBu", n=5)))

intersection_list = list()
for (chip_dyn in names(chip_dyns)) {
  for (rna_dyn in names(rna_dyns)) {
    intersection_name <- paste(chip_dyn, rna_dyn, sep = " & ")
    intersection_list[[intersection_name]] <- intersect(chip_dyns[[chip_dyn]], rna_dyns[[rna_dyn]])
  }
}

heatmap_data <- data.frame("Spermatid.peak.width" = chip_dfs[["Spermatid"]][,c("peak.width")],
                           "Spermatid.promoter.mean" = chip_dfs[["Spermatid"]][,c("promoter.mean")],
                           "Spermatid.promoter.dna.methyl" = chip_dfs[["Spermatid"]][,c("promoter.dna.methyl")],
                           "Sperm.peak.width" = chip_dfs[["Sperm"]][,c("peak.width")],
                           "Sperm.promoter.mean" = chip_dfs[["Sperm"]][,c("promoter.mean")],
                           "Sperm.promoter.dna.methyl" = chip_dfs[["Sperm"]][,c("promoter.dna.methyl")],
                           "Pre-ZGA.peak.width" = chip_dfs[["Pre-ZGA"]][,c("peak.width")],
                           "Pre-ZGA.promoter.mean" = chip_dfs[["Pre-ZGA"]][,c("promoter.mean")],
                           "Pre-ZGA.promoter.dna.methyl" = chip_dfs[["Pre-ZGA"]][,c("promoter.dna.methyl")],
                           "Pre-ZGA.promoter.accessibility" = chip_dfs[["Pre-ZGA"]][,c("promoter.accessibility")],
                           "ZGA.peak.width" = chip_dfs[["ZGA"]][,c("peak.width")],
                           "ZGA.promoter.mean" = chip_dfs[["ZGA"]][,c("promoter.mean")],
                           "Spermatid.expression" = chip_dfs[["Spermatid"]][,c("expression")],
                           "ZGA.expression.6hpf" = chip_dfs[["ZGA"]][,c("expression.6hpf")],
                           "ZGA.expression.7hpf" = chip_dfs[["ZGA"]][,c("expression.7hpf")],
                           "ZGA.expression.8hpf" = chip_dfs[["ZGA"]][,c("expression.8hpf")],
                           "ZGA.expression.9hpf" = chip_dfs[["ZGA"]][,c("expression.9hpf")],
                           "CG.density" = chip_dfs[["Spermatid"]][,c("cg.density")],
                           "RNA.group" = sapply(allgenes$gene_id, label_dyn, dyns = rna_dyns),
                           "RNA.group.timing" = sapply(allgenes$gene_id, label_dyn, dyns = rna_dyns_timing),
                           "Dynamic" = sapply(allgenes$gene_id, label_dyn, dyns = intersection_list),
                           "Chip.group.detail" = sapply(allgenes$gene_id, label_dyn, dyns = chip_dyns_detail),
                           "Chip.group" = sapply(allgenes$gene_id, label_dyn, dyns = chip_dyns))

#heatmap_data$RNA.group[is.na(heatmap_data$RNA.group)] <- "ND"
#heatmap_data$RNA.group <- factor(heatmap_data$RNA.group, levels = c("GS", "GZ", "ZS", "ND"))
clustering=FALSE

RNA dynamics heatmap

mean_data <- aggregate(. ~  RNA.group, data=heatmap_data[!colnames(heatmap_data) %in% c("Chip.group", "Dynamic", "RNA.group.timing", "Chip.group.detail")], FUN=mean)
rownames(mean_data) <- mean_data[,"RNA.group"]
mean_data <- mean_data[,-1]
scaled_data = as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_rna <- Heatmap(scaled_data, 
        name = "Mean (z-score)",
        col = col_fun,
        #column_title = "Datasets", row_title = "Genes",
        show_row_dend = clustering,
        cluster_columns = clustering,
        show_row_names = TRUE,
        cluster_rows = clustering,
        row_split = c("Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation","Accessibility", "Peak width", "Promoter mean", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
        row_title_rot = 0,
)
draw(hm_rna)

pdf("output/multiomic/rna_dynamics_multiomic_heatmap.pdf", width=8, height=6)
draw(hm_rna)
dev.off()
## quartz_off_screen 
##                 2
mean_data <- aggregate(. ~  RNA.group.timing, data=heatmap_data[!colnames(heatmap_data) %in% c("Chip.group", "Dynamic", "Chip.group.detail", "RNA.group")], FUN=mean)
rownames(mean_data) <- mean_data[,"RNA.group.timing"]
mean_data <- mean_data[,-1]
scaled_data = as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_rna <- Heatmap(scaled_data, 
        name = "Mean (z-score)",
        col = col_fun,
        #column_title = "Datasets", row_title = "Genes",
        show_row_dend = clustering,
        cluster_columns = clustering,
        show_row_names = TRUE,
        cluster_rows = clustering,
        row_split = c("Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation","Accessibility", "Peak width", "Promoter mean", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
        row_title_rot = 0,
)
draw(hm_rna)

pdf("output/multiomic/rna_dynamics_timing_multiomic_heatmap.pdf", width=8, height=6)
draw(hm_rna)
dev.off()
## quartz_off_screen 
##                 2

Chip dynamics heatmap

mean_data <- aggregate(. ~  Chip.group, data=heatmap_data[!colnames(heatmap_data) %in% c("RNA.group", "Dynamic", "ZGA.peak.width", "Pre.ZGA.peak.width", "Spermatid.peak.width", "Sperm.peak.width", "ZGA.promoter.mean", "Pre.ZGA.promoter.mean", "Spermatid.promoter.mean", "Sperm.promoter.mean", "Chip.group.detail", "RNA.group.timing")], FUN=mean)
rownames(mean_data) <- mean_data[,"Chip.group"]
mean_data <- mean_data[,-1]
scaled_data = as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_chip <- Heatmap(scaled_data, 
        name = "Mean (z-score)",
        col = col_fun,
        #column_title = "Datasets", row_title = "Genes",
        show_row_dend = clustering,
        cluster_columns = clustering,
        show_row_names = TRUE,
        cluster_rows = clustering,
        row_split = c("DNA methylation", "DNA methylation", "DNA methylation", "Accessibility", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
        row_title_rot = 0,
)
draw(hm_chip)

pdf("output/multiomic/chip_dynamics_multiomic_heatmap.pdf", width=8, height=6)
draw(hm_chip)
dev.off()
## quartz_off_screen 
##                 2
mean_data <- aggregate(. ~  Chip.group.detail, data=heatmap_data[!colnames(heatmap_data) %in% c("RNA.group", "Dynamic", "ZGA.peak.width", "Pre.ZGA.peak.width", "Spermatid.peak.width", "Sperm.peak.width", "ZGA.promoter.mean", "Pre.ZGA.promoter.mean", "Spermatid.promoter.mean", "Sperm.promoter.mean", "Chip.group", "RNA.group.timing")], FUN=mean)
rownames(mean_data) <- mean_data[,"Chip.group.detail"]
mean_data <- mean_data[,-1]
scaled_data = as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm_chip <- Heatmap(scaled_data, 
        name = "Mean (z-score)",
        col = col_fun,
        #column_title = "Datasets", row_title = "Genes",
        show_row_dend = clustering,
        cluster_columns = clustering,
        show_row_names = TRUE,
        cluster_rows = clustering,
        row_split = c("DNA methylation", "DNA methylation", "DNA methylation", "Accessibility", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
        row_title_rot = 0,
)
draw(hm_chip)

pdf("output/multiomic/chip_dynamics_detail_multiomic_heatmap.pdf", width=8, height=6)
draw(hm_chip)
dev.off()
## quartz_off_screen 
##                 2
mean_data <- aggregate(. ~ Dynamic, data=heatmap_data[!colnames(heatmap_data) %in% c("RNA.group", "Chip.group", "RNA.group.timing", "Chip.group.detail")], FUN=mean)
rownames(mean_data) <- mean_data[,"Dynamic"]
mean_data <- mean_data[,-1]
scaled_data = as.matrix(scale(mean_data))
scaled_data <- t(scaled_data)
rownames(scaled_data) <- pure_labels[rownames(scaled_data)]
hm <- Heatmap(scaled_data, 
        name = "Mean (z-score)",
        col = col_fun,
        #column_title = "Datasets", row_title = "Genes",
        show_row_dend = clustering,
        cluster_columns = clustering,
        show_row_names = TRUE,
        cluster_rows = clustering,
        row_split = c("Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation", "Peak width", "Promoter mean", "DNA Methylation","Accessibility", "Peak width", "Promoter mean", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "Expression levels", "CG density"),
        row_title_rot = 0,
)
draw(hm)

pdf("output/multiomic/rna_chip_intersection_dynamics_multiomic_heatmap.pdf", width=8, height=6)
draw(hm)
dev.off()
## quartz_off_screen 
##                 2

Correlation plot

cor_labels <- c("Spermatid.promoter.dna.methyl" = "Spermatid DNAme", 
                 "Sperm.promoter.dna.methyl"="Sperm DNAme", 
                 "Pre.ZGA.promoter.dna.methyl"="Pre-ZGA DNAme", 
                 "Pre.ZGA.promoter.accessibility"="Pre-ZGA Accessibility", 
                 "ZGA.expression.9hpf" = "9hpf RNA expression", 
                 "ZGA.expression.8hpf" = "8hpf RNA expression", 
                 "ZGA.expression.7hpf" = "7hpf RNA expression",
                 "ZGA.expression.6hpf" = "6hpf RNA expression",
                 "CG.density" ="CG density",
                 "Spermatid.peak.width" = "Spermatid peak width",
                 "Sperm.peak.width" = "Sperm peak width",
                 "Pre.ZGA.peak.width" = "Pre-ZGA peak width",
                 "ZGA.peak.width" = "ZGA peak width",
                 "Spermatid.promoter.mean" = "Spermatid promoter mean",
                 "Sperm.promoter.mean" = "Sperm promoter mean",
                 "Pre.ZGA.promoter.mean" = "Pre-ZGA promoter mean",
                 "ZGA.promoter.mean" = "ZGA promoter mean",
                 "Spermatid.expression" = "Spermatid RNA expression")

df <- heatmap_data[,!colnames(heatmap_data) %in% c("RNA.group", "Chip.group", "Chip.group.detail", "RNA.group.timing", "Dynamic")]
df <- na.omit(df)
colnames(df) <- cor_labels[colnames(df)]

plot_corrplot <- function(){
  corrplot(cor(df, method="pearson"),
         method = "color",       
         order = "FPC",
         col= rev(COL2('RdBu', 200)),
         tl.col = "black")}

plot_corrplot()

pdf('output/multiomic/corr_plot.pdf', width=10, height=12)
plot_corrplot()
dev.off()
## quartz_off_screen 
##                 2

PCA plot

only_prezga = TRUE

complete_data <- heatmap_data
if(only_prezga){
  pca_input <- complete_data[,colnames(complete_data) %in% c("Pre.ZGA.promoter.mean", "Pre.ZGA.peak.width", "Pre.ZGA.promoter.dna.methyl", "Pre.ZGA.promoter.accessibility")]
} else {
  pca_input <- complete_data[,!colnames(complete_data) %in% c("RNA.group", "Chip.group")]
}
df <- na.omit(pca_input)
  
color.groups = complete_data[!rownames(pca_input) %in% names(na.action(df)),"Chip.group"]
cols = cols_chip

#color.groups = complete_data[!rownames(pca_input) %in% names(na.action(df)),"RNA.group"]
#cols = cols_rna

data.pca <- princomp(scale(df), cor=TRUE)
#summary(data.pca)
fviz_eig(data.pca, addlabels = TRUE)

fviz_contrib(data.pca, choice = "var", axes = 1:2)

fviz_pca_var(data.pca, col.var = "contrib",
            gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
            repel = TRUE)

fviz_pca_ind(data.pca,
             col.ind = color.groups, 
             palette = cols,
             label="none",
             pointsize = 0.5,
             addEllipses = TRUE, # Concentration ellipses
             ellipse.type = "norm",
             legend.title = "Dynamics")
## Warning: Removed 2125 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Unsupervised clustering heatmap

genes = allgenes$gene_id
modalities = c("promoter.mean" = "H3K4me3 mean", "peak.width" = "H3K4me3 peak width", "promoter.accessibility"="Accessibility", "cg.density"="CG density", "promoter.dna.methyl"="DNA methylation")

hm_data <- cbind(chip_dfs[["Pre-ZGA"]][,c("promoter.mean", "peak.width", "promoter.dna.methyl", "promoter.accessibility")], chip_dfs[["Spermatid"]]['cg.density'])
#heatmap_data <- heatmap_data[sample(1:nrow(heatmap_data), 1000, replace=FALSE),]
colnames(hm_data) <- modalities[colnames(hm_data)]
scaled_data = as.matrix(scale(hm_data))

set.seed(12)
kclus <- kmeans(scaled_data, 8)
#cluster_order = c(4,5,2,6,8,7,3,1)
cluster_order = c(8,3,7,1,2,4,6,5)
split <- factor(cluster_order[kclus$cluster]) #levels=cluster_order)

col_fun = colorRamp2(seq(-2, 2), rev(COL2("RdBu", n=5)))
hm1 <- Heatmap(t(scaled_data), 
        name = "Mean (z-score)",
        col = col_fun,
        #column_title = "Datasets", row_title = "Genes",
        show_row_dend = FALSE,
        cluster_columns = FALSE,
        show_row_names = TRUE,
        show_column_names = FALSE,
        #column_order = c("promoter.enrich", "peak.width", "promoter.dna.methyl", "promoter.accessibility"),
        column_split = split,
        cluster_row_slices = FALSE
)

draw(hm1)

pdf('output/multiomic/unsupervised_clustering_heatmap.pdf', width=9, height=3)
draw(hm1)
dev.off()
## quartz_off_screen 
##                 2